#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')
library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
rownames(datExpr) = datGenes$ensembl_gene_id
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]
# Clusterings
clusterings = read_csv('./../Data/clusters.csv')
# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('entrezgene'=rownames(.) %>% as.numeric) %>%
dplyr::rename('padj' = adj.P.Val, 'log2FoldChange' = logFC) %>%
left_join(datGenes %>% dplyr::select(entrezgene, ensembl_gene_id) %>%
dplyr::rename('ID' = ensembl_gene_id), by = 'entrezgene') %>%
left_join(SFARI_genes, by='ID') %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
distinct(ID, .keep_all = TRUE) %>% left_join(GO_neuronal, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
left_join(clusterings, by='ID') %>%
mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`),
significant=padj<0.05 & !is.na(padj))
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)
genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))
clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]
dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]
load('./../Data/GSEA.RData')
GSEA_SFARI = enrichment_SFARI
load('./../Data/ORA.RData')
ORA_SFARI = enrichment_SFARI
rm(DE_info, GO_annotations, clusterings, getinfo, mart, efit, enrichment_DGN, enrichment_DO, enrichment_GO,
enrichment_KEGG, enrichment_Reactome, enrichment_SFARI)
We have results both from GSEA and ORA to measure the enrichment of SFARI Genes in each module, and they both agree with each other relatively well
SFARI_genes_by_module = c()
for(module in names(GSEA_SFARI)){
GSEA_info = GSEA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, NES) %>%
mutate(pvalue = ifelse(NES>0, pvalue, 1-pvalue),
p.adjust = ifelse(NES>0, p.adjust, 1)) %>%
dplyr::rename('GSEA_pval' = pvalue, 'GSEA_padj'= p.adjust)
ORA_info = ORA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, qvalue, GeneRatio, Count) %>%
dplyr::rename('ORA_pval' = pvalue, 'ORA_padj' = p.adjust)
module_info = GSEA_info %>% full_join(ORA_info, by = 'ID') %>% add_column(.before = 'ID', Module = module)
SFARI_genes_by_module = rbind(SFARI_genes_by_module, module_info)
}
SFARI_genes_by_module = SFARI_genes_by_module %>%
left_join(dataset %>% dplyr::select(Module, MTcor) %>%
group_by(Module,MTcor) %>% tally %>% ungroup, by = 'Module') %>%
mutate(ORA_pval = ifelse(is.na(ORA_pval), 1, ORA_pval),
ORA_padj = ifelse(is.na(ORA_padj), 1, ORA_padj))
plot_data = SFARI_genes_by_module %>% filter(ID=='SFARI')
ggplotly(plot_data %>% ggplot(aes(1-GSEA_pval, 1-ORA_pval, size = n)) +
geom_point(color = plot_data$Module, alpha = .6, aes(id=Module)) +
geom_smooth(se=FALSE, color = '#CCCCCC') +
xlab('GSEA Enrichment') + ylab('ORA Enrichment') + coord_fixed() +
ggtitle(paste0('Corr = ', round(cor(plot_data$GSEA_pval, plot_data$ORA_pval),2))) +
theme_minimal() + theme(legend.position = 'none'))
To determine which modules have a statistically significant enrichment in SFARI Genes we can use the adjusted p-values. We used the Bonferroni correction for this.
GSEA identifies 51/179 as significant. This doesn’t make sense
ggplotly(plot_data %>% ggplot(aes(GSEA_padj, ORA_padj, size = n)) +
geom_point(color = plot_data$Module, alpha = .7, aes(id=Module)) +
geom_smooth(se=FALSE, color = '#CCCCCC') +
geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dashed') +
geom_vline(xintercept = 0.01, color = 'gray', linetype = 'dashed') +
xlab('GSEA adjusted p-value') + ylab('ORA adjusted p-value') +
scale_x_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) +
scale_y_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) +
ggtitle(paste0('Corr = ',round(cor(plot_data$GSEA_padj, plot_data$ORA_padj),2))) + coord_fixed() +
theme_minimal() + theme(legend.position = 'none'))
plot_data = plot_data %>% mutate(GSEA_sig = GSEA_padj<0.01, ORA_sig = ORA_padj<0.01) %>%
apply_labels(GSEA_sig = 'GSEA significant enrichment',
ORA_sig = 'ORA significant enrichment')
cro(plot_data$GSEA_sig, list(plot_data$ORA_sig, total()))
| ORA significant enrichment | #Total | |||
|---|---|---|---|---|
| FALSE | TRUE | |||
| GSEA significant enrichment | ||||
| FALSE | 127 | 1 | 128 | |
| TRUE | 48 | 3 | 51 | |
| #Total cases | 175 | 4 | 179 | |
The ‘over-enrichment’ in SFARI Modules in GSEA could be because SFARI Genes have in general higher Module Memberships than the other genes, which would make them cluster at the beginning of the list constantly and would bias the enrichment analysis.
Looking at the plot below, we can see that there is not a uniform distribution of SFARI genes across all quantiles of the Module Membership values, but they instead seem to cluster around Module Membership values with high magnitudes (both positive and negative), so I don’t think the GSEA results for the SFARI genes are valid.
Because of this, I’m going to use the enrichment from the ORA to study the SFARI Genes
quant_data = dataset %>% dplyr::select(ID, contains('MM.')) %>%
left_join(genes_info %>% dplyr::select(ID, gene.score), by = 'ID') %>% dplyr::select(-ID) %>%
melt %>% mutate(quant = cut(value, breaks = quantile(value, probs = seq(0,1,0.05)) %>%
as.vector, labels = FALSE)) %>%
group_by(gene.score, quant) %>% tally %>% ungroup %>% ungroup
quant_data = quant_data %>% group_by(quant) %>% summarise(N = sum(n)) %>% ungroup %>%
left_join(quant_data, by = 'quant') %>% dplyr::select(quant, gene.score, n, N) %>%
mutate(p = round(100*n/N,2)) %>% filter(!is.na(quant)) %>%
mutate(gene.score = factor(gene.score, levels = rev(c('1','2','3','Neuronal','Others'))))
ggplotly(quant_data %>% filter(!gene.score %in% c('Neuronal','Others')) %>%
ggplot(aes(quant, p, fill = gene.score)) + geom_bar(stat='identity') +
xlab('Module Membership Quantiles') + ylab('% of SFARI Genes in Quantile') +
ggtitle('Percentage of Genes labelled as SFARI in each Quantile') +
scale_fill_manual(values = SFARI_colour_hue(r=rev(c(1:3)))) +
theme_minimal() + theme(legend.position = 'none'))
rm(quant_data)
Selecting modules with an adjusted p-value below 0.01 using the ORA
ggplotly(plot_data %>% ggplot(aes(MTcor, ORA_padj, size=n)) +
geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) +
geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dotted') +
xlab('Module-Diagnosis Correlation') + ylab('Corrected p-values') + scale_y_log10() +
ggtitle('Modules Significantly Enriched in SFARI Genes') +
theme_minimal() + theme(legend.position = 'none'))
top_modules = plot_data %>% arrange(desc(ORA_padj)) %>% filter(ORA_padj<0.01) %>% pull(Module) %>% as.character
plot_data %>% filter(Module %in% top_modules) %>% arrange(ORA_pval) %>%
dplyr::select(Module, MTcor, ORA_pval, ORA_padj, qvalue, GeneRatio, Count) %>%
rename( ORA_pval = 'p-value', ORA_padj = 'Adjusted p-value') %>%
kable %>% kable_styling(full_width = F)
| Module | MTcor | p-value | Adjusted p-value | qvalue | GeneRatio | Count |
|---|---|---|---|---|---|---|
| #A68AFF | 0.2417718 | 0.0000981 | 0.0004904 | 0.0001033 | 7/24 | 7 |
| #00BBDC | 0.3504563 | 0.0002836 | 0.0014180 | 0.0008956 | 7/28 | 7 |
| #00BF76 | -0.4033879 | 0.0007926 | 0.0039631 | 0.0016687 | 10/63 | 10 |
| #26B700 | -0.1792697 | 0.0016871 | 0.0050612 | 0.0008879 | 5/19 | 5 |
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>%
left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
alpha = ifelse(ImportantModules=='Others', 0.1, 1),
gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
apply_labels(ImportantModules = 'Top Modules')
plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) +
geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
ggtitle('Genes belonging to the Modules with the highest SFARI Genes Enrichment')
rm(pca)
Following the WGCNA pipeline, selecting the genes with the highest Module Membership and Gene Significance
Ordered by \(\frac{MM+|GS|}{2}\)
There aren’t that many SFARI genes in the top genes of the modules
create_table = function(module){
top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>%
mutate(Relevance = (MM+abs(GS))/2) %>% arrange(by=-Relevance) %>% top_n(20) %>%
dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
return(top_genes)
}
top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])
kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| HLA-DRA | 0.8747479 | -0.3649290 | Others | 0.6198384 |
| CD74 | 0.9157544 | -0.1742287 | Others | 0.5449915 |
| HLA-DRB1 | 0.8532709 | 0.0972571 | 3 | 0.4752640 |
| HLA-DPA1 | 0.8156707 | -0.1306145 | Others | 0.4731426 |
| CIITA | 0.6334165 | -0.3008236 | Others | 0.4671200 |
| HLA-DOA | 0.8283354 | -0.0942383 | Others | 0.4612868 |
| HLA-DPB1 | 0.7835951 | -0.1023942 | 3 | 0.4429947 |
| FGL2 | 0.6498709 | -0.2184730 | Others | 0.4341720 |
| HAVCR2 | 0.6660515 | -0.1266919 | Others | 0.3963717 |
| GLTSCR1L | 0.4917052 | -0.1907723 | Others | 0.3412388 |
| HLA-DQA1 | 0.4583689 | -0.0686622 | Others | 0.2635156 |
| CARTPT | 0.4456341 | 0.0345264 | Others | 0.2400802 |
| APOBEC3G | 0.4263521 | 0.0301908 | Others | 0.2282714 |
| YWHAQ | 0.4042128 | -0.0387991 | Others | 0.2215059 |
| SLC27A2 | 0.3672742 | 0.0453783 | Others | 0.2063262 |
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| CHD7 | 0.8185871 | -0.5338120 | 1 | 0.6761995 |
| GPR116 | 0.6712372 | -0.5646098 | Others | 0.6179235 |
| DNAJC21 | 0.7593297 | -0.4733788 | Others | 0.6163542 |
| UIMC1 | 0.7666779 | -0.4329504 | 3 | 0.5998142 |
| SPECC1 | 0.8511586 | -0.3400076 | Others | 0.5955831 |
| PTK2 | 0.7045439 | -0.4591038 | Others | 0.5818239 |
| ZNF608 | 0.7629912 | -0.3716821 | Others | 0.5673366 |
| DSC1 | 0.6959402 | -0.4344299 | Others | 0.5651850 |
| PHF21A | 0.7433904 | -0.3700826 | 1 | 0.5567365 |
| EHMT1 | 0.6989350 | -0.4077613 | 1 | 0.5533482 |
| TTLL5 | 0.7486423 | -0.2941657 | Others | 0.5214040 |
| TRUB2 | 0.4645929 | -0.5391575 | Others | 0.5018752 |
| RAD54B | 0.6086840 | -0.3338852 | Others | 0.4712846 |
| NKIRAS1 | 0.4654338 | -0.4735435 | Others | 0.4694886 |
| MAP4 | 0.6561171 | -0.2630500 | Others | 0.4595836 |
| BCAN | 0.5210378 | -0.3559353 | Others | 0.4384866 |
| CDK5RAP2 | 0.7024048 | -0.1651121 | Others | 0.4337585 |
| ING2 | 0.5735163 | -0.2932299 | Others | 0.4333731 |
| CCDC7 | 0.6639173 | -0.1834586 | Others | 0.4236879 |
| RNF220 | 0.5499203 | -0.2943829 | Others | 0.4221516 |
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| MACF1 | 0.8659623 | 0.2645816 | Others | 0.5652719 |
| NXPE2 | 0.6461758 | 0.4527530 | Others | 0.5494644 |
| SETD5 | 0.7530549 | 0.3282670 | 1 | 0.5406610 |
| KALRN | 0.6547142 | 0.3550189 | Others | 0.5048666 |
| DST | 0.6684159 | 0.3320908 | 3 | 0.5002533 |
| VAMP5 | 0.6552634 | 0.3415350 | Others | 0.4983992 |
| NIN | 0.8427420 | 0.1529265 | Others | 0.4978343 |
| PHF3 | 0.4558986 | 0.5183742 | 1 | 0.4871364 |
| SHC1 | 0.6521559 | 0.2736955 | Others | 0.4629257 |
| PSG9 | 0.5825269 | 0.3258620 | Others | 0.4541944 |
| ITPR2 | 0.5174786 | 0.3377800 | Others | 0.4276293 |
| SYNE1 | 0.6390105 | 0.1993551 | 3 | 0.4191828 |
| CUL7 | 0.5084846 | 0.3253475 | 2 | 0.4169160 |
| CDK10 | 0.4796263 | 0.2977635 | Others | 0.3886949 |
| DLG2 | 0.5819806 | 0.1746726 | 2 | 0.3783266 |
| CHST9 | 0.5701735 | -0.1747792 | Others | 0.3724764 |
| LRRC37A3 | 0.6552248 | 0.0470024 | Others | 0.3511136 |
| TRIO | 0.6781371 | -0.0019589 | 1 | 0.3400480 |
| MARCH3 | 0.4647628 | 0.2058328 | Others | 0.3352978 |
| PLEKHM1 | 0.4858289 | -0.0370002 | Others | 0.2614146 |
rm(create_table, i)
The top genes tend to have a relatively high mean level of expression
pca = datExpr %>% prcomp
ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))
plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
theme_minimal() + ggtitle('Most relevant genes for top Modules')
rm(ids, pca, tg, plot_data)
Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:
Gene Ontology
Disease Ontology
Disease Gene Network
KEGG
REACTOME
# GSEA
load('./../Data/GSEA.RData')
# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI
# ORA
load('./../Data/ORA.RData')
# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI
rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome)
compare_methods = function(GSEA_list, ORA_list){
for(top_module in top_modules){
cat(paste0(' \n \nEnrichments for Module ', top_module, ' (MTcor=',
round(dataset$MTcor[dataset$Module==top_module][1],2), '): \n \n'))
GSEA = GSEA_list[[top_module]]
ORA = ORA_list[[top_module]]
cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms \n'))
cat(paste0('ORA has ', nrow(ORA), ' enriched terms \n'))
cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods \n \n'))
plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>%
dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID')
if(nrow(plot_data)>0){
print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>%
arrange(pval_mean) %>% dplyr::select(-pval_mean) %>%
kable %>% kable_styling(full_width = F))
}
}
}
plot_results = function(GSEA_list, ORA_list){
l = htmltools::tagList()
for(i in 1:length(top_modules)){
GSEA = GSEA_list[[top_modules[i]]]
ORA = ORA_list[[top_modules[i]]]
plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
if(nrow(plot_data)>5){
min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) +
geom_point(aes(id = Description)) +
geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') +
geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') +
ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
scale_x_continuous(limits = c(min_val, max_val)) +
scale_y_continuous(limits = c(min_val, max_val)) +
xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
l[[i]] = ggp
}
}
return(l)
}
compare_methods(GSEA_KEGG, ORA_KEGG)
Enrichments for Module #26B700 (MTcor=-0.18):
GSEA has 24 enriched terms
ORA has 24 enriched terms
19 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa05152 | Tuberculosis | 2.202978 | 0.0048776 | 0.00e+00 | 8/11 | 0e+00 |
| hsa05164 | Influenza A | 1.856789 | 0.0049026 | 0.00e+00 | 7/11 | 0e+00 |
| hsa04514 | Cell adhesion molecules | 2.304926 | 0.0049519 | 1.10e-06 | 6/11 | 0e+00 |
| hsa04145 | Phagosome | 2.370070 | 0.0049569 | 1.00e-06 | 6/11 | 0e+00 |
| hsa04640 | Hematopoietic cell lineage | 2.363607 | 0.0051608 | 1.00e-07 | 6/11 | 0e+00 |
| hsa05323 | Rheumatoid arthritis | 2.478602 | 0.0051624 | 1.00e-07 | 6/11 | 0e+00 |
| hsa05140 | Leishmaniasis | 2.282038 | 0.0052551 | 0.00e+00 | 6/11 | 0e+00 |
| hsa04612 | Antigen processing and presentation | 2.520062 | 0.0053259 | 0.00e+00 | 8/11 | 0e+00 |
| hsa05150 | Staphylococcus aureus infection | 2.487320 | 0.0053259 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05321 | Inflammatory bowel disease | 2.240381 | 0.0053435 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05416 | Viral myocarditis | 2.416675 | 0.0053435 | 0.00e+00 | 6/11 | 0e+00 |
| hsa04940 | Type I diabetes mellitus | 2.567063 | 0.0054584 | 0.00e+00 | 6/11 | 0e+00 |
| hsa04672 | Intestinal immune network for IgA production | 2.368116 | 0.0054762 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05320 | Autoimmune thyroid disease | 2.197707 | 0.0054762 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05330 | Allograft rejection | 2.428696 | 0.0055213 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05332 | Graft-versus-host disease | 2.559866 | 0.0055360 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05310 | Asthma | 2.590835 | 0.0056600 | 0.00e+00 | 6/11 | 0e+00 |
| hsa05169 | Epstein-Barr virus infection | 1.703062 | 0.0575858 | 6.60e-06 | 6/11 | 1e-07 |
| hsa05166 | Human T-cell leukemia virus 1 infection | 1.671515 | 0.0807498 | 1.11e-05 | 6/11 | 2e-07 |
Enrichments for Module #00BF76 (MTcor=-0.4):
GSEA has 5 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BBDC (MTcor=0.35):
GSEA has 4 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #A68AFF (MTcor=0.24):
GSEA has 25 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa04360 | Axon guidance | 1.875438 | 0.0090189 | 0.0012325 | 5/13 | 0.0011509 |
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_KEGG, ORA_KEGG)
compare_methods(GSEA_Reactome, ORA_Reactome)
Enrichments for Module #26B700 (MTcor=-0.18):
GSEA has 24 enriched terms
ORA has 11 enriched terms
7 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| R-HSA-1280215 | Cytokine Signaling in Immune system | 1.781126 | 0.0162331 | 0.0003201 | 7/13 | 1.83e-05 |
| R-HSA-913531 | Interferon Signaling | 1.890160 | 0.0190785 | 0.0000033 | 6/13 | 2.00e-07 |
| R-HSA-877300 | Interferon gamma signaling | 2.321918 | 0.0202082 | 0.0000001 | 6/13 | 0.00e+00 |
| R-HSA-389948 | PD-1 signaling | 2.265938 | 0.0223332 | 0.0000000 | 5/13 | 0.00e+00 |
| R-HSA-202427 | Phosphorylation of CD3 and TCR zeta chains | 2.267208 | 0.0223668 | 0.0000000 | 5/13 | 0.00e+00 |
| R-HSA-202430 | Translocation of ZAP-70 to Immunological synapse | 2.236699 | 0.0225682 | 0.0000000 | 5/13 | 0.00e+00 |
| R-HSA-202433 | Generation of second messenger molecules | 2.124477 | 0.0652799 | 0.0000000 | 5/13 | 0.00e+00 |
Enrichments for Module #00BF76 (MTcor=-0.4):
GSEA has 50 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BBDC (MTcor=0.35):
GSEA has 19 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #A68AFF (MTcor=0.24):
GSEA has 36 enriched terms
ORA has 7 enriched terms
1 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| R-HSA-422475 | Axon guidance | 1.716807 | 0.0154331 | 0.0185591 | 6/16 | 0.0022338 |
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_Reactome, ORA_Reactome)
compare_methods(GSEA_GO, ORA_GO)
Enrichments for Module #26B700 (MTcor=-0.18):
GSEA has 111 enriched terms
ORA has 86 enriched terms
40 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| GO:0002764 | immune response-regulating signaling pathway | 2.146861 | 0.0737188 | 0.0021103 | 6/14 | 0.0000725 |
| GO:0002757 | immune response-activating signal transduction | 2.103415 | 0.0744176 | 0.0014164 | 6/14 | 0.0000519 |
| GO:0002253 | activation of immune response | 1.998163 | 0.0730447 | 0.0030187 | 6/14 | 0.0000830 |
| GO:0042110 | T cell activation | 2.031128 | 0.0753165 | 0.0008022 | 6/14 | 0.0000315 |
| GO:0046649 | lymphocyte activation | 1.965198 | 0.0714323 | 0.0070359 | 6/14 | 0.0001612 |
| GO:0050863 | regulation of T cell activation | 2.028704 | 0.0789124 | 0.0032545 | 5/14 | 0.0000852 |
| GO:0019882 | antigen processing and presentation | 1.997324 | 0.0824630 | 0.0000000 | 8/14 | 0.0000000 |
| GO:0002429 | immune response-activating cell surface receptor signaling pathway | 2.168264 | 0.0785318 | 0.0039998 | 5/14 | 0.0001000 |
| GO:0050851 | antigen receptor-mediated signaling pathway | 2.200772 | 0.0823920 | 0.0005383 | 5/14 | 0.0000247 |
| GO:0034341 | response to interferon-gamma | 2.108099 | 0.0836887 | 0.0000046 | 6/14 | 0.0000003 |
| GO:0002768 | immune response-regulating cell surface receptor signaling pathway | 2.201816 | 0.0774725 | 0.0065760 | 5/14 | 0.0001572 |
| GO:0048002 | antigen processing and presentation of peptide antigen | 2.012148 | 0.0842560 | 0.0000000 | 7/14 | 0.0000000 |
| GO:0071346 | cellular response to interferon-gamma | 2.180626 | 0.0848338 | 0.0000023 | 6/14 | 0.0000001 |
| GO:0050852 | T cell receptor signaling pathway | 2.148759 | 0.0848338 | 0.0001467 | 5/14 | 0.0000073 |
| GO:0051249 | regulation of lymphocyte activation | 1.954367 | 0.0761214 | 0.0125035 | 5/14 | 0.0002751 |
| GO:0060333 | interferon-gamma-mediated signaling pathway | 2.363246 | 0.0888672 | 0.0000001 | 6/14 | 0.0000000 |
| GO:0002504 | antigen processing and presentation of peptide or polysaccharide antigen via MHC class II | 2.129769 | 0.0894793 | 0.0000000 | 7/14 | 0.0000000 |
| GO:0002495 | antigen processing and presentation of peptide antigen via MHC class II | 2.147117 | 0.0895351 | 0.0000000 | 7/14 | 0.0000000 |
| GO:0070665 | positive regulation of leukocyte proliferation | 2.123407 | 0.0867491 | 0.0028635 | 4/14 | 0.0000829 |
| GO:0050671 | positive regulation of lymphocyte proliferation | 2.133298 | 0.0871329 | 0.0025323 | 4/14 | 0.0000806 |
| GO:0032946 | positive regulation of mononuclear cell proliferation | 2.126081 | 0.0870648 | 0.0026393 | 4/14 | 0.0000806 |
| GO:0050670 | regulation of lymphocyte proliferation | 2.086918 | 0.0830366 | 0.0148736 | 4/14 | 0.0002895 |
| GO:0050870 | positive regulation of T cell activation | 2.246731 | 0.0830366 | 0.0148736 | 4/14 | 0.0002895 |
| GO:0032944 | regulation of mononuclear cell proliferation | 2.080818 | 0.0829419 | 0.0152677 | 4/14 | 0.0002895 |
| GO:0070663 | regulation of leukocyte proliferation | 2.059693 | 0.0824630 | 0.0182452 | 4/14 | 0.0003237 |
| GO:1903039 | positive regulation of leukocyte cell-cell adhesion | 2.305850 | 0.0823920 | 0.0196439 | 4/14 | 0.0003376 |
| GO:0002694 | regulation of leukocyte activation | 1.956432 | 0.0743770 | 0.0299637 | 5/14 | 0.0004994 |
| GO:0050865 | regulation of cell activation | 1.898168 | 0.0736909 | 0.0415454 | 5/14 | 0.0005712 |
| GO:0022409 | positive regulation of cell-cell adhesion | 2.198924 | 0.0810048 | 0.0352927 | 4/14 | 0.0005661 |
| GO:0046651 | lymphocyte proliferation | 2.046325 | 0.0807602 | 0.0375531 | 4/14 | 0.0005661 |
| GO:0032943 | mononuclear cell proliferation | 2.056501 | 0.0806743 | 0.0391177 | 4/14 | 0.0005661 |
| GO:0051251 | positive regulation of lymphocyte activation | 2.131962 | 0.0805290 | 0.0440986 | 4/14 | 0.0005735 |
| GO:0042102 | positive regulation of T cell proliferation | 2.128206 | 0.0899588 | 0.0390238 | 3/14 | 0.0005661 |
| GO:0070661 | leukocyte proliferation | 2.020797 | 0.0800622 | 0.0504787 | 4/14 | 0.0006169 |
| GO:0032649 | regulation of interferon-gamma production | 2.085140 | 0.0898802 | 0.0407956 | 3/14 | 0.0005712 |
| GO:0032609 | interferon-gamma production | 2.112378 | 0.0894793 | 0.0504481 | 3/14 | 0.0006169 |
| GO:1903037 | regulation of leukocyte cell-cell adhesion | 2.158327 | 0.0796368 | 0.0629503 | 4/14 | 0.0007526 |
| GO:0002696 | positive regulation of leukocyte activation | 2.201813 | 0.0791574 | 0.0762325 | 4/14 | 0.0008920 |
| GO:0050867 | positive regulation of cell activation | 2.201281 | 0.0786791 | 0.0899816 | 4/14 | 0.0010310 |
| GO:0007159 | leukocyte cell-cell adhesion | 2.126880 | 0.0785318 | 0.0959563 | 4/14 | 0.0010770 |
Enrichments for Module #00BF76 (MTcor=-0.4):
GSEA has 21 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BBDC (MTcor=0.35):
GSEA has 12 enriched terms
ORA has 5 enriched terms
0 terms are enriched in both methods
Enrichments for Module #A68AFF (MTcor=0.24):
GSEA has 51 enriched terms
ORA has 3 enriched terms
0 terms are enriched in both methods
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_GO, ORA_GO)
compare_methods(GSEA_DO, ORA_DO)
Enrichments for Module #26B700 (MTcor=-0.18):
GSEA has 30 enriched terms
ORA has 17 enriched terms
8 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| DOID:3118 | hepatobiliary disease | 1.800729 | 0.0098052 | 0.0001776 | 8/12 | 0.0000302 |
| DOID:409 | liver disease | 1.833261 | 0.0098729 | 0.0001279 | 8/12 | 0.0000290 |
| DOID:2237 | hepatitis | 2.036511 | 0.0103409 | 0.0000093 | 8/12 | 0.0000032 |
| DOID:2043 | hepatitis B | 2.070166 | 0.0113925 | 0.0000000 | 8/12 | 0.0000000 |
| DOID:2377 | multiple sclerosis | 2.033268 | 0.0115065 | 0.0010006 | 5/12 | 0.0001135 |
| DOID:3213 | demyelinating disease | 2.026508 | 0.0114709 | 0.0012071 | 5/12 | 0.0001174 |
| DOID:12361 | Graves’ disease | 2.182370 | 0.0122352 | 0.0699226 | 3/12 | 0.0033989 |
| DOID:0060005 | autoimmune disease of endocrine system | 2.082962 | 0.0121867 | 0.0870863 | 3/12 | 0.0039149 |
Enrichments for Module #00BF76 (MTcor=-0.4):
GSEA has 0 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BBDC (MTcor=0.35):
GSEA has 2 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #A68AFF (MTcor=0.24):
GSEA has 4 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_DO, ORA_DO)
compare_methods(GSEA_DGN, ORA_DGN)
Enrichments for Module #26B700 (MTcor=-0.18):
GSEA has 58 enriched terms
ORA has 63 enriched terms
15 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| umls:C0014072 | Experimental Autoimmune Encephalomyelitis | 1.713611 | 0.0448744 | 0.0006127 | 6/14 | 0.0000165 |
| umls:C0007570 | Celiac Disease | 1.798176 | 0.0455368 | 0.0000079 | 7/14 | 0.0000004 |
| umls:C0036202 | Sarcoidosis | 1.906604 | 0.0473033 | 0.0000506 | 6/14 | 0.0000017 |
| umls:C0038013 | Ankylosing spondylitis | 1.854257 | 0.0475532 | 0.0016618 | 5/14 | 0.0000395 |
| umls:C0026896 | Myasthenia Gravis | 1.897274 | 0.0490831 | 0.0003814 | 5/14 | 0.0000110 |
| umls:C3495559 | Juvenile arthritis | 1.745540 | 0.0455368 | 0.0083627 | 5/14 | 0.0001301 |
| umls:C0242583 | Bare Lymphocyte Syndrome | 2.280483 | 0.0565284 | 0.0000000 | 6/14 | 0.0000000 |
| umls:C2931418 | Bare lymphocyte syndrome 2 | 2.280483 | 0.0565284 | 0.0000000 | 6/14 | 0.0000000 |
| umls:C0005138 | Berylliosis | 2.278758 | 0.0576906 | 0.0000019 | 4/14 | 0.0000001 |
| umls:C0020517 | Hypersensitivity | 2.202802 | 0.0499555 | 0.0078476 | 4/14 | 0.0001269 |
| umls:C0376545 | Hematologic Neoplasms | 1.679383 | 0.0439095 | 0.0335246 | 5/14 | 0.0003307 |
| umls:C0041327 | Tuberculosis, Pulmonary | 1.919842 | 0.0483386 | 0.0297397 | 4/14 | 0.0003084 |
| umls:C0035455 | Rhinitis | 2.356729 | 0.0523883 | 0.0626667 | 3/14 | 0.0004822 |
| umls:C0036421 | Systemic Scleroderma | 1.652695 | 0.0430287 | 0.0722192 | 5/14 | 0.0005284 |
| umls:C0018133 | Graft-vs-Host Disease | 1.772540 | 0.0932989 | 0.0906218 | 4/14 | 0.0005817 |
Enrichments for Module #00BF76 (MTcor=-0.4):
GSEA has 1 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BBDC (MTcor=0.35):
GSEA has 2 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #A68AFF (MTcor=0.24):
GSEA has 13 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Plots of the results when there are more than 5 terms in common between methods:
plot_results(GSEA_DGN, ORA_DGN)
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] WGCNA_1.69 fastcluster_1.1.25 dynamicTreeCut_1.63-1
## [4] org.Hs.eg.db_3.8.2 AnnotationDbi_1.46.1 IRanges_2.18.3
## [7] S4Vectors_0.22.1 Biobase_2.44.0 BiocGenerics_0.30.0
## [10] DOSE_3.10.2 ReactomePA_1.28.0 clusterProfiler_3.12.0
## [13] biomaRt_2.40.5 kableExtra_1.1.0 knitr_1.28
## [16] doParallel_1.0.15 iterators_1.0.12 foreach_1.5.0
## [19] polycor_0.7-10 expss_0.10.2 ggpubr_0.2.5
## [22] magrittr_1.5 GGally_1.5.0 gridExtra_2.3
## [25] viridis_0.5.1 viridisLite_0.3.0 RColorBrewer_1.1-2
## [28] dendextend_1.13.4 plotly_4.9.2 glue_1.4.1
## [31] reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0
## [34] dplyr_1.0.0 purrr_0.3.4 readr_1.3.1
## [37] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.2
## [40] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.8 Hmisc_4.4-0
## [4] fastmatch_1.1-0 plyr_1.8.6 igraph_1.2.5
## [7] lazyeval_0.2.2 splines_3.6.3 crosstalk_1.1.0.1
## [10] BiocParallel_1.18.1 urltools_1.7.3 digest_0.6.25
## [13] htmltools_0.4.0 GOSemSim_2.10.0 GO.db_3.8.2
## [16] fansi_0.4.1 checkmate_2.0.0 memoise_1.1.0
## [19] cluster_2.1.0 graphlayouts_0.7.0 modelr_0.1.6
## [22] matrixStats_0.56.0 enrichplot_1.4.0 prettyunits_1.1.1
## [25] jpeg_0.1-8.1 colorspace_1.4-1 rappdirs_0.3.1
## [28] blob_1.2.1 rvest_0.3.5 ggrepel_0.8.2
## [31] haven_2.2.0 xfun_0.12 crayon_1.3.4
## [34] RCurl_1.98-1.2 jsonlite_1.7.0 graph_1.62.0
## [37] impute_1.58.0 survival_3.1-12 polyclip_1.10-0
## [40] gtable_0.3.0 webshot_0.5.2 UpSetR_1.4.0
## [43] graphite_1.30.0 scales_1.1.1 DBI_1.1.0
## [46] Rcpp_1.0.4.6 progress_1.2.2 htmlTable_1.13.3
## [49] gridGraphics_0.5-0 reactome.db_1.68.0 foreign_0.8-76
## [52] bit_1.1-15.2 europepmc_0.4 preprocessCore_1.46.0
## [55] Formula_1.2-3 htmlwidgets_1.5.1 httr_1.4.1
## [58] fgsea_1.10.1 acepack_1.4.1 ellipsis_0.3.1
## [61] pkgconfig_2.0.3 reshape_0.8.8 XML_3.99-0.3
## [64] farver_2.0.3 nnet_7.3-14 dbplyr_1.4.2
## [67] labeling_0.3 ggplotify_0.0.5 tidyselect_1.1.0
## [70] rlang_0.4.6 munsell_0.5.0 cellranger_1.1.0
## [73] tools_3.6.3 cli_2.0.2 generics_0.0.2
## [76] RSQLite_2.2.0 ggridges_0.5.2 broom_0.5.5
## [79] evaluate_0.14 yaml_2.2.1 bit64_0.9-7
## [82] fs_1.4.0 tidygraph_1.2.0 ggraph_2.0.3
## [85] nlme_3.1-147 DO.db_2.9 xml2_1.2.5
## [88] compiler_3.6.3 rstudioapi_0.11 curl_4.3
## [91] png_0.1-7 ggsignif_0.6.0 reprex_0.3.0
## [94] tweenr_1.0.1 stringi_1.4.6 highr_0.8
## [97] lattice_0.20-41 Matrix_1.2-18 vctrs_0.3.1
## [100] pillar_1.4.4 lifecycle_0.2.0 BiocManager_1.30.10
## [103] triebeard_0.3.0 data.table_1.12.8 cowplot_1.0.0
## [106] bitops_1.0-6 qvalue_2.16.0 latticeExtra_0.6-29
## [109] R6_2.4.1 codetools_0.2-16 MASS_7.3-51.6
## [112] assertthat_0.2.1 withr_2.2.0 mgcv_1.8-31
## [115] hms_0.5.3 rpart_4.1-15 grid_3.6.3
## [118] rvcheck_0.1.8 rmarkdown_2.1 ggforce_0.3.1
## [121] base64enc_0.1-3 lubridate_1.7.4